# Required packages ####
#install.packages("quantreg")
#install.packages("doParallel")
#install.packages("snow")
#install.packages("reldist")
#install.packages("rstan")
#install.packages("orthopolynom")
require(quantreg)
require(doParallel)
require(reldist)
require(orthopolynom)

# Folders ####
directory<-Sys.getenv("US_Ineq_Repl")
CHwd<-file.path(directory, "Scripts/CH")
Datawd<-file.path(directory, "Processed")
Tabwd<-file.path(directory, "Results/Tables")

# Functions #### 
setwd(CHwd)
source("CHfun.R")

# Code begins ####
meth<-"pfn"
y0<-1986
y1<-2015
tot_cores<-detectCores()
no_cores <- round(0.3*tot_cores) 
no_cores <- 5*(no_cores%/%5)+if(no_cores%%5>2){5}else{0}

D<-GetData(size=80000,boot=F,directory=Datawd,y0=y0,y1=y1)

vars<-c("lrwage3","union","pubsect","manuf","nonwhite",
        "female","partte","married","smsa",
        "exper" , "exper2" ,
        "exp_cl" , 
        paste0("expe",1:length(unique(D$Z0$exp_cl))), 
        "years_educ" , "educ_cl" , 
        paste0("educ",1:length(unique(D$Z0$educ_cl))), 
        "edex" ,
        "region",
        paste0("reg",1:length(unique(D$Z0$region))),
        "ee_cl",
        paste0("ee",1:length(unique(D$Z0$ee_cl))),
        "ind21",
        paste0("indu",1:length(unique(D$Z0$ind21))),
        "state",
        paste0("state",1:length(unique(D$Z0$state))),
        "orgwgt","year")

omit<-c("lrwage3",
        "exp_cl","expe1",
        "years_educ",
        "educ_cl","educ1",
        "ee_cl","ee1",
        "region",paste0("reg",1:length(unique(D$Z0$region))),
        "ind21", "indu1",
        "state", "state1",
        "orgwgt","year")
xvars<-vars[!vars %in% omit]

f<-paste0("lrwage3 ~ ",paste(xvars,collapse =" + ")) 
formula <- as.formula(f)

md<-lm(formula,data=D$Z0,weights=orgwgt)
summary(md)

omit0<-names(md$coefficients[is.na(md$coefficients)]) 
xvars0<-xvars[!xvars %in% omit0]
f0<-paste0("lrwage3 ~ ",paste(xvars0,collapse =" + ")) 

h<-paste("c('(Intercept)',", gsub("\\+",",",gsub(" ","'",gsub("^.*?~","",f0))),"')",sep="")
h<-eval(parse(text=h))

formula0 <- as.formula(f0)

qmd<-rq(formula0,data=D$Z0,tau=0.5,weights=orgwgt,method="pfn")
summary(qmd)

repe<-1000
p<-length(h)
Q<-c(0,0.25,0.5,0.75,1)
q<-length(Q)
probab<-D$Z0["orgwgt"]/sum(D$Z0["orgwgt"])
mu_h<-w_mean(D$Z0[,"lrwage3"],probab)

Mean<-matrix(rep(0,q*(p)),ncol=q,nrow=p)
Variance<-matrix(rep(0,q*(p)),ncol=q,nrow=p)

#set up a log file for iterations
logFile <- paste0(Datawd,"/log_ite0.txt")
cat("This is a log file for CH0 impacts ... ", file=logFile, append=FALSE, sep = "\n")
rs <- paste0(Datawd,"/results0.txt")
cat("", file=rs, append=FALSE)

start_time <- Sys.time()
res<-(1/mu_h)*Impact(n=7,w="orgwgt",mth=meth,nodes=no_cores,J=69,
                     formula=formula0,h=h,Data=D$Z0,seed=T,s=0,
                     ParRq=ParRq,
                     Legendre=Legendre,
                     polynomial.values=polynomial.values)
end_time <- Sys.time()
di<-end_time - start_time
cat(paste0('time = ',round(di,2),' ',di%.%units),'\n')


require(doParallel)
cls <- makeCluster(2, type = "SOCK")
registerDoParallel(cls)
st <- Sys.time()

results<-foreach(rr=1:repe,.combine='rbind',.inorder=F)%dopar%{
  start_time <- Sys.time()
  impact<-(1/mu_h)*Impact(n=7,w="orgwgt",mth=meth,nodes=no_cores,J=69,
                          formula=formula0,h=h,Data=D$Z0,seed=T,s=rr,
                          ParRq=ParRq,
                          Legendre=Legendre,
                          polynomial.values=polynomial.values)
  write.table(impact,file=rs,append=T,row.names=F,col.names=F)
  end_time <- Sys.time()
  res<-rbind(res,impact)
  di<-end_time - start_time
  cat("iteration = ", rr, "\n",file=logFile, append=TRUE)
  cat(paste0('time = ',round(di,2),' ',di%.%units),'\n',
      file=logFile, append=TRUE)
  impact
}
et <- Sys.time()
dis<-et - st
cat(paste0('time = ',round(dis,2),' ',dis%.%units),'\n')
stopCluster(cls)

results<-read.table(file=rs)
names(results)<-colnames(res)

#one additional repetition
repe<-nrow(results)/5

alpha<-0.05
for (qq in 1:q){
  index<-seq(from = qq, to = repe*(q), by = q)
  Mean[,qq]<-colMeans(results[index,])
  t<-paste("CI_Q",qq,"<-apply(results[index,],2,quantile,probs=c(alpha/2,0.5,1-alpha/2))",sep="")
  eval(parse(text=t))
  t<-paste("colnames(CI_Q",qq,")<-h",sep="")
  eval(parse(text=t))
  Variance[,qq]=colVars(results[index,])
  nam <- paste("Q", qq, sep = "")
  assign(nam,results[index,])
  t<-paste("colnames(Q",qq,")<-h",sep="")
  eval(parse(text=t))
}

rownames(Mean)<-h
colnames(Mean)<-c("Q1","Q2","Q3","Q4","Total")
rownames(Variance)<-h
colnames(Variance)<-c("Q1","Q2","Q3","Q4","Total")
t(CI_Q5)

setwd(Datawd)
rm(D)
save.image("CH0.RData")

